22 May, 2017

suppressWarnings(library(tidyverse))
suppressWarnings(library(magrittr))
suppressWarnings(library(knitr))
suppressWarnings(library(broom))
suppressWarnings(library(arsRtools))

intron 1

deeptools_file_iasillo <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Refseqv3q1_intron1_5SS_sensitivity_joined.gz'

deeptools_file_meola <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Refseqv3q1_intron1_5SS_sensitivity_joined.gz'

biotype <- 'intron1_RefseqNMq1'
anchor <- '5SS'

Deeptools to R

This part is not run here, just shown for Documentation. The resulting tidy data frame has been saved and is loaded in the next section.
code not run only shown for documentation

#!/bin/sh

cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws

bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/RefSeqGRCh37.3/ref_GRCh37.p5_top_level_geq3exons_NMq1intron1.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS intron1 "eGFP" "Ars2,Z18" \
"deeptools_out/Refseqv3q1_intron1_5SS_sensitivity" "--binSize=50 --numberOfProcessors=4"


cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws/Meola_RNAseq_bws/

bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/RefSeqGRCh37.3/ref_GRCh37.p5_top_level_geq3exons_NMq1intron1.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS intron1 "EGFP" "RRP40" \
"deeptools_out/Refseqv3q1_intron1_5SS_sensitivity" "--binSize=50 --numberOfProcessors=4"

matrix files for iasillo: /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Refseqv3q1_intron1_5SS_sensitivity_joined.gz
matrix files for meola /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Refseqv3q1_intron1_5SS_sensitivity_joined.gz
biotype: intron1_RefseqNMq1
anchor: 5SS

code not run only shown for documentation

bin_values <- arsRtools::load_RNAseq_matrices(deeptools_file_iasillo, deeptools_file_meola)
bin_values_filename <- paste0('../data/RNAseq/RNAseq_bin_values_', biotype, '_', anchor, '.RData')

Saving bin values to file: ../data/RNAseq/RNAseq_bin_values_intron1_RefseqNMq1_5SS.RData

code not run only shown for documentation

save(bin_values, file=bin_values_filename)

Load the tidy data frame of single 50bp bin values

everything below is run.

load(bin_values_filename, verbose=TRUE)
## Loading objects:
##   bin_values

heatmaps scaled values

data('RNAseq_value_heatmap_theme', package='arsRtools')
row_order <- bin_values %>% 
    distinct(id, end, start) %>% 
    mutate(length = end-start) %>% 
    ungroup %>% 
    arrange(length) %$% 
    id

bin_values %>%
  filter(value > 0) %>%
  mutate(id = factor(id, levels=row_order)) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  xlab(paste0('bp to ', biotype, ' ', anchor)) +
  RNAseq_value_heatmap_theme +
  theme(axis.text.y=element_blank())

are there blank lines

total ids considered:

distinct(bin_values, id) %>%
  nrow
## [1] 7233

empty:

empty_ids_intron1 <- bin_values %>%
  group_by(id) %>%
  summarize(sum_value = sum(value),
            max_value = max(value)) %>%
  filter(sum_value == 0 & max_value == 0) %$%
  id

length(empty_ids_intron1)
## [1] 705

save empty for later use …

save(empty_ids_intron1, file='../data/RNAseq/Refseq_multi_exonic_NMq1_empty_intron1_ids.RData')

log2 ratios

bin_sensitivities <- arsRtools::RNAseq_sensitivity_matrix(bin_values)

heatmaps log2FC

data('RNAseq_logFC_heatmap_theme', package='arsRtools')
bin_sensitivities %<>%
  left_join(., bin_values %>% 
                distinct(id, end, start) %>% 
                mutate(length = end-start,
                       length=round(length/50,0)*50,
                       dot_value = 1))

length_order <- bin_sensitivities %>% distinct(id, length) %>% arrange(length) %$% id
bin_sensitivities %<>% mutate(id = factor(id, levels=length_order)) %>% arrange(id)
bin_sensitivities %>% 
  mutate(value = case_when(.$value > 4 ~ 4,
                           .$value < .25 ~ .25,
                           TRUE ~ .$value)) %>%
  filter( !(id %in% empty_ids_intron1),
         value > 0,
         rel_pos > -2000, rel_pos < 5000) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  RNAseq_logFC_heatmap_theme +
  theme(axis.text.y=element_blank()) + 
  xlim(-2000,5000) + 
  geom_tile(aes(x=length, y=id, fill=dot_value))

bin_sensitivities %>% 
  filter( siRNA == 'Ars2',
          !(id %in% empty_ids_intron1),
         value > 0) %>%
  ggplot(.) +
  geom_raster(aes(x=length, y=id, fill=dot_value), interpolate=FALSE) +
  theme(axis.text.y=element_blank()) +
  xlim(-2000,5000)

intron2

deeptools_file_iasillo <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Refseqv3q1_intron2_5SS_sensitivity_joined.gz'

deeptools_file_meola <- '/Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Refseqv3q1_intron2_5SS_sensitivity_joined.gz'

biotype <- 'intron2_RefseqNMq1'
anchor <- '5SS'

Deeptools to R

This part is not run here, just shown for Documentation. The resulting tidy data frame has been saved and is loaded in the next section.
code not run only shown for documentation

#!/bin/sh

cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws

bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/RefSeqGRCh37.3/ref_GRCh37.p5_top_level_geq3exons_NMq1intron2.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS intron2 "eGFP" "Ars2,Z18" \
"deeptools_out/Refseqv3q1_intron2_5SS_sensitivity" "--binSize=50 --numberOfProcessors=4"


cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws/Meola_RNAseq_bws/

bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/RefSeqGRCh37.3/ref_GRCh37.p5_top_level_geq3exons_NMq1intron2.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS intron2 "EGFP" "RRP40" \
"deeptools_out/Refseqv3q1_intron2_5SS_sensitivity" "--binSize=50 --numberOfProcessors=4"

matrix files for iasillo: /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Refseqv3q1_intron2_5SS_sensitivity_joined.gz
matrix files for meola /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Refseqv3q1_intron2_5SS_sensitivity_joined.gz
biotype: intron2_RefseqNMq1
anchor: 5SS

code not run only shown for documentation

bin_values <- arsRtools::load_RNAseq_matrices(deeptools_file_iasillo, deeptools_file_meola)
bin_values_filename <- paste0('../data/RNAseq/RNAseq_bin_values_', biotype, '_', anchor, '.RData')

Saving bin values to file: ../data/RNAseq/RNAseq_bin_values_intron2_RefseqNMq1_5SS.RData

code not run only shown for documentation

save(bin_values, file=bin_values_filename)

Load the tidy data frame of single 50bp bin values

everything below is run.

load(bin_values_filename, verbose=TRUE)
## Loading objects:
##   bin_values

heatmaps scaled values

row_order <- bin_values %>% 
    distinct(id, end, start) %>% 
    mutate(length = end-start) %>% 
    ungroup %>% 
    arrange(length) %$% 
    id

bin_values %>%
  filter(value > 0) %>%
  mutate(id = factor(id, levels=row_order)) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  xlab(paste0('bp to ', biotype, ' ', anchor)) +
  RNAseq_value_heatmap_theme +
  theme(axis.text.y=element_blank())

are there blank lines

total ids considered:

distinct(bin_values, id) %>%
  nrow
## [1] 7233

empty:

empty_ids_intron2 <- bin_values %>%
  group_by(id) %>%
  summarize(sum_value = sum(value),
            max_value = max(value)) %>%
  filter(sum_value == 0 & max_value == 0) %$%
  id

length(empty_ids_intron2)
## [1] 688

save empty for later use …

save(empty_ids_intron2, file='../data/RNAseq/Refseq_multi_exonic_NMq1_empty_intron2_ids.RData')

log2 ratios

bin_sensitivities <- arsRtools::RNAseq_sensitivity_matrix(bin_values)

heatmaps log2FC

bin_sensitivities %<>%
  left_join(., bin_values %>% 
                distinct(id, end, start) %>% 
                mutate(length = end-start,
                       length=round(length/50,0)*50,
                       dot_value = 1))

length_order <- bin_sensitivities %>% distinct(id, length) %>% arrange(length) %$% id
bin_sensitivities %<>% mutate(id = factor(id, levels=length_order)) %>% arrange(id)
bin_sensitivities %>% 
  mutate(value = case_when(.$value > 4 ~ 4,
                           .$value < .25 ~ .25,
                           TRUE ~ .$value)) %>%
  filter( !(id %in% empty_ids_intron2),
         value > 0,
         rel_pos > -2000, rel_pos < 5000) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  RNAseq_logFC_heatmap_theme +
  theme(axis.text.y=element_blank()) + 
  xlim(-2000,5000) + 
  geom_tile(aes(x=length, y=id, fill=dot_value))

bin_sensitivities %>% 
  filter( siRNA == 'Ars2',
          !(id %in% empty_ids_intron2),
         value > 0) %>%
  ggplot(.) +
  geom_raster(aes(x=length, y=id, fill=dot_value), interpolate=FALSE) +
  theme(axis.text.y=element_blank()) +
  xlim(-2000,5000)

sessionInfo()
## R version 3.2.3 (2015-12-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.6 (El Capitan)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] arsRtools_0.1        RColorBrewer_1.1-2   rtracklayer_1.30.4  
##  [4] GenomicRanges_1.22.4 GenomeInfoDb_1.6.3   IRanges_2.4.8       
##  [7] S4Vectors_0.8.11     BiocGenerics_0.16.1  broom_0.4.1         
## [10] knitr_1.15.1         magrittr_1.5         dplyr_0.5.0         
## [13] purrr_0.2.2          readr_1.0.0          tidyr_0.6.0         
## [16] tibble_1.2           ggplot2_2.2.0        tidyverse_1.0.0     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.8                futile.logger_1.4.3       
##  [3] XVector_0.10.0             plyr_1.8.4                
##  [5] futile.options_1.0.0       bitops_1.0-6              
##  [7] zlibbioc_1.16.0            tools_3.2.3               
##  [9] digest_0.6.10              evaluate_0.10             
## [11] gtable_0.2.0               nlme_3.1-128              
## [13] lattice_0.20-34            psych_1.6.9               
## [15] DBI_0.5-1                  yaml_2.1.14               
## [17] stringr_1.1.0              Biostrings_2.38.4         
## [19] rprojroot_1.1              grid_3.2.3                
## [21] Biobase_2.30.0             R6_2.2.0                  
## [23] BiocParallel_1.4.3         XML_3.98-1.5              
## [25] foreign_0.8-67             rmarkdown_1.2             
## [27] lambda.r_1.1.9             reshape2_1.4.2            
## [29] GenomicAlignments_1.6.3    Rsamtools_1.22.0          
## [31] backports_1.0.4            scales_0.4.1              
## [33] htmltools_0.3.5            SummarizedExperiment_1.0.2
## [35] assertthat_0.1             mnormt_1.5-5              
## [37] colorspace_1.3-2           labeling_0.3              
## [39] stringi_1.1.2              RCurl_1.95-4.8            
## [41] lazyeval_0.2.0             munsell_0.4.3